function [net, currSample_video]=process_epoch_STGConvNet_miniBatch(para, net, currSample_video)


    numLayers=numel(net.layers);
    
    %res_obs_stat_weights = 
   
    res_obs_miniBatch = struct(...
        'stat_weights', cell(1,numLayers+1), ...
        'stat_bias', cell(1,numLayers+1));
    
    res_syn_miniBatch = struct(...
        'stat_weights', cell(1,numLayers+1), ...
        'stat_bias', cell(1,numLayers+1));
    

    for iBatch = 1: para.numBatch
        
        
        startID_sample= 1 + para.numChain * (iBatch -1);
        endID_sample= para.numChain + para.numChain * (iBatch -1);
        currSample_video_batch = gpuArray(currSample_video(:,:,:,:,startID_sample:endID_sample));
        
        
        imageSet_batch = gpuArray(para.imageSet(:,:,:,:,para.batch{iBatch}));
        

        %% compute statistics of the observed sequence
        res_obs = struct(...
            'x', cell(1,numLayers+1), ...
            'indicator', cell(1,numLayers+1), ...
            'stat_weights', cell(1,numLayers+1), ...
            'stat_bias', cell(1,numLayers+1));
        
        
        
        res_obs(1).x = imageSet_batch;  % the first layer of response map is the observed signal

        for l=1:numLayers

            tic
            res_obs(l+1).x = mex_conv3d(res_obs(l).x, net.layers{l}.filters, net.layers{l}.bias, 'pad', net.layers{l}.pad, 'stride', net.layers{l}.stride);
            res_obs(l+1).x = vl_nnrelu(res_obs(l+1).x);
            res_obs(l+1).indicator = vl_nnrelu(res_obs(l+1).x,  gpuArray(ones(size(res_obs(l+1).x),'single')));


            %         if l==numLayers
            %             res_obs(l+1).indicator = gpuArray(ones(1,'single'));
            %         end



            [~, res_obs(l+1).stat_weights, res_obs(l+1).stat_bias] = mex_conv3d(res_obs(l).x, net.layers{l}.filters, net.layers{l}.bias, res_obs(l+1).indicator, 'pad', net.layers{l}.pad, 'stride', net.layers{l}.stride);

            % clear res_obs(l).x indicator_FirstLayer
            
            res_obs(l+1).stat_weights = gather(res_obs(l+1).stat_weights * (1/  numel(para.batch{iBatch})));
            res_obs(l+1).stat_bias = gather(res_obs(l+1).stat_bias * (1/  numel(para.batch{iBatch})));
            disp(['compute the observed statistics in layer '  num2str(l) ' : ' num2str(toc)]);   
            
            
            %% average observed statistics over all miniBatches  
            if isempty(res_obs_miniBatch(l+1).stat_weights)
                res_obs_miniBatch(l+1).stat_weights = res_obs(l+1).stat_weights ./ para.numBatch;
                res_obs_miniBatch(l+1).stat_bias = res_obs(l+1).stat_bias ./ para.numBatch;
            else
                res_obs_miniBatch(l+1).stat_weights = res_obs_miniBatch(l+1).stat_weights + res_obs(l+1).stat_weights ./ para.numBatch;
                res_obs_miniBatch(l+1).stat_bias = res_obs_miniBatch(l+1).stat_bias + res_obs(l+1).stat_bias ./ para.numBatch;
            end           
            
        end      
    
              

        %% sampling by Langevin dynamics
        currSample_video_batch = sampling_sequence_by_Langevin(net, para.L, para.stepsize, currSample_video_batch, false);  %%% ????


        %% compute statistics of the synthesized sequence
        res_syn = struct(...
            'x', cell(1,numLayers+1), ...
            'indicator', cell(1,numLayers+1), ...
            'stat_weights', cell(1,numLayers+1), ...
            'stat_bias', cell(1,numLayers+1));

        res_syn(1).x = currSample_video_batch;

        for l=1:numLayers

            tic
            res_syn(l+1).x = mex_conv3d(res_syn(l).x, net.layers{l}.filters, net.layers{l}.bias, 'pad', net.layers{l}.pad, 'stride', net.layers{l}.stride);
            res_syn(l+1).x = vl_nnrelu(res_syn(l+1).x);
            res_syn(l+1).indicator = vl_nnrelu(res_syn(l+1).x, gpuArray(ones(size(res_syn(l+1).x),'single' )));


            %         if l==numLayers
            %             res_syn(l+1).indicator = gpuArray(ones(1,'single'));
            %         end




            [~, res_syn(l+1).stat_weights, res_syn(l+1).stat_bias] = mex_conv3d(res_syn(l).x, net.layers{l}.filters, net.layers{l}.bias, res_syn(l+1).indicator, 'pad', net.layers{l}.pad, 'stride', net.layers{l}.stride);

            res_syn(l+1).stat_weights = gather(res_syn(l+1).stat_weights * (1/para.numChain));
            res_syn(l+1).stat_bias = gather(res_syn(l+1).stat_bias * (1/para.numChain));
            
            disp(['compute the synthesized statistics in layer '  num2str(l) ' : ' num2str(toc)]);            
            
            %% average observed statistics over all miniBatches  
            if isempty(res_syn_miniBatch(l+1).stat_weights)
                res_syn_miniBatch(l+1).stat_weights = res_syn(l+1).stat_weights ./ para.numBatch;
                res_syn_miniBatch(l+1).stat_bias = res_syn(l+1).stat_bias ./ para.numBatch;
            else
                res_syn_miniBatch(l+1).stat_weights = res_syn_miniBatch(l+1).stat_weights + res_syn(l+1).stat_weights ./ para.numBatch;
                res_syn_miniBatch(l+1).stat_bias = res_syn_miniBatch(l+1).stat_bias + res_syn(l+1).stat_bias ./ para.numBatch;
            end           
            
        end        
        
        disp(['batch: '  num2str(iBatch) ' of ' num2str(para.numBatch)]);
        currSample_video(:,:,:,:,startID_sample:endID_sample) = gather(currSample_video_batch);
       
    end


    for l=1:numLayers

        %% compute the gradient for weights and bias
        gradient_weight = res_obs_miniBatch(l+1).stat_weights - res_syn_miniBatch(l+1).stat_weights;
        gradient_bias = res_obs_miniBatch(l+1).stat_bias - res_syn_miniBatch(l+1).stat_bias;      

        disp(['Layer ' num2str(l) ': SSD_weight: ' num2str(mean(abs(gradient_weight(:))))]);

        %% update the weights and bias
        for iFilter = 1:net.layers{l}.numFilter
            adaptivelambdaLearningRate = net.layers{l}.lambdaLearningRate / (res_obs(l+1).stat_bias(iFilter) +eps);
            net.layers{l}.filters(:,:,:,:,iFilter)=net.layers{l}.filters(:,:,:,:,iFilter)+ adaptivelambdaLearningRate * gradient_weight(:,:,:,:,iFilter);
            net.layers{l}.bias(iFilter) = net.layers{l}.bias(iFilter) + adaptivelambdaLearningRate * gradient_bias(iFilter);
        end

    end
    